This document aims at exploring two datasets, one in 2016 on 6 individuals and another one in 2018 on 4 individuals. For that purpose, we need first to load the weanlingNES package to load data.
# load library
library(weanlingNES)
# load data
data("data_nes", package = "weanlingNES")
# load("../data/data_nes.rda")
Let’s have a look at what’s inside data_nes$data_2016:
# list structure
str(data_nes$year_2016, max.level = 1, give.attr = F, no.list = T)
## $ ind_3449:Classes 'data.table' and 'data.frame': 384 obs. of 23 variables:
## $ ind_3450:Classes 'data.table' and 'data.frame': 253 obs. of 23 variables:
## $ ind_3456:Classes 'data.table' and 'data.frame': 253 obs. of 23 variables:
## $ ind_3457:Classes 'data.table' and 'data.frame': 253 obs. of 23 variables:
## $ ind_3460:Classes 'data.table' and 'data.frame': 253 obs. of 23 variables:
## $ ind_3463:Classes 'data.table' and 'data.frame': 213 obs. of 23 variables:
A list of 6
data.frames, one for each seal
For convenience, we aggregate all 6 individuals into one dataset.
# combine all individuals
data_2016 <- rbindlist(data_nes$year_2016, use.name = TRUE, idcol = TRUE)
# display
DT::datatable(data_2016[sample.int(.N, 100), ], options = list(scrollX = T))
# raw_data
data_2016[, .(
nb_days_recorded = uniqueN(as.Date(date)),
max_depth = max(maxpress_dbars),
sst_mean = mean(sst2_c),
sst_sd = sd(sst2_c)
), by = .id] %>%
sable(
caption = "Summary diving information relative to each 2016 individual",
digits = 2
)
| .id | nb_days_recorded | max_depth | sst_mean | sst_sd |
|---|---|---|---|---|
| ind_3449 | 384 | 1118.81 | 26.54 | 129.91 |
| ind_3450 | 253 | 954.81 | 130.29 | 367.69 |
| ind_3456 | 253 | 697.63 | 125.24 | 360.39 |
| ind_3457 | 253 | 572.94 | 135.56 | 374.71 |
| ind_3460 | 253 | 832.25 | 65.24 | 249.12 |
| ind_3463 | 213 | 648.81 | 212.19 | 462.88 |
Well, it seems that sst is a bit odd. Let’s have a look at its distribution.
ggplot(data_2016, aes(x = sst2_c, fill = .id)) +
geom_histogram(show.legend = FALSE) +
facet_wrap(.id ~ .) +
theme_jjo()
Figure 1.1: Distribution of raw sst2 for the four individuals in 2016
Let’s remove any data with a sst2_c > 500.
data_2016_filter <- data_2016[sst2_c < 500, ]
ggplot(data_2016_filter, aes(x = sst2_c, fill = .id)) +
geom_histogram(show.legend = FALSE) +
facet_wrap(.id ~ .) +
theme_jjo()
Figure 1.2: Distribution of filtered sst2 for the four individuals in 2016
Well, that seems to be much better! In the process of filtering out odd values, we removed 116 rows this way:
# nbrow removed
data_2016[sst2_c > 500, .(nb_row_removed = .N), by = .id] %>%
sable(caption = "# of rows removed by 2016-individuals")
| .id | nb_row_removed |
|---|---|
| ind_3449 | 4 |
| ind_3450 | 23 |
| ind_3456 | 22 |
| ind_3457 | 24 |
| ind_3460 | 10 |
| ind_3463 | 33 |
# max depth
ggplot(
data_2016,
aes(y = -maxpress_dbars, x = as.Date(date), col = .id)
) +
geom_path(show.legend = FALSE) +
geom_point(data = data_2016[sst2_c > 500, ], col = "black") +
scale_x_date(date_labels = "%m/%Y") +
labs(y = "Pressure (dbar)", x = "Date") +
facet_wrap(.id ~ .) +
theme_jjo() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Figure 1.3: Where and when the sst2 outliers occured
Well this latter plot highlights several points:
ind_3449 seems to have return ashore twice during his trackLet’s see if we can double check that, using a map.
# interactive map
leaflet() %>%
setView(lng = -122, lat = 38, zoom = 2) %>%
addTiles() %>%
addPolylines(
lat = data_2016[.id == "ind_3449", latitude_degs],
lng = data_2016[.id == "ind_3449", longitude_degs],
weight = 2
) %>%
addCircleMarkers(
lat = data_2016[.id == "ind_3449" & sst2_c > 500, latitude_degs],
lng = data_2016[.id == "ind_3449" & sst2_c > 500, longitude_degs],
radius = 3,
stroke = FALSE,
color = "red",
fillOpacity = 1
)
Figure 1.4: It is supposed to be the track of ind_3449… (red dots are the location of removed rows)
… these coordinates seem weird !
# summary of the coordinates by individuals
data_2016[, .(.id, longitude_degs, latitude_degs)] %>%
tbl_summary(by = .id) %>%
modify_caption("Summary of `longitude_degree` and `latitude_degree`")
| Characteristic | ind_3449, N = 3841 | ind_3450, N = 2531 | ind_3456, N = 2531 | ind_3457, N = 2531 | ind_3460, N = 2531 | ind_3463, N = 2131 |
|---|---|---|---|---|---|---|
| longitude_degs | -119 (-132, -69) | -124 (-144, -99) | -112 (-134, -6) | -122 (-132, -97) | -122 (-144, -85) | -121 (-134, -93) |
| latitude_degs | 39 (-67, 68) | 60 (-63, 68) | 56 (-63, 68) | 44 (-63, 68) | 59 (-63, 68) | 63 (42, 72) |
|
1
Median (IQR)
|
||||||
# distribution coordinates
ggplot(
data = melt(data_2016[, .(
Longitude = longitude_degs,
Latitude = latitude_degs,
.id
)],
id.vars =
".id", value.name = "Coordinate"
),
aes(x = Coordinate, fill = .id)
) +
geom_histogram(show.legend = F) +
facet_grid(variable ~ .id) +
theme_jjo()
Figure 1.5: Distribution of coordinates per seal
There is definitely something wrong with these coordinates (five seals would have crossed the equator…), but the representation of the track can also be improved! Here are the some points to explore:
longitude a part of the data seems to have a wrong sign, resulting in these distribution, that appear to be cut offlatitude, well this is ensure but maybe the same problem occursLet’s try to play on coordinates’ sign to see if we can display something that makes more sense.
# interactive map
leaflet() %>%
setView(lng = -122, lat = 50, zoom = 3) %>%
addTiles() %>%
addPolylines(
lat = data_2016[.id == "ind_3449", abs(latitude_degs)],
lng = data_2016[.id == "ind_3449", -abs(longitude_degs)],
weight = 2
)
Figure 1.6: An attempt to display the ind_3449’s track
I’ll better ask Roxanne!
# build dataset to check for missing values
dataPlot <- melt(data_2016_filter[, .(.id, is.na(.SD)), .SDcol = -c(
".id",
"rec#",
"date",
"time"
)])
# add the id of rows
dataPlot[, id_row := c(1:.N), by = c("variable", ".id")]
# plot
ggplot(dataPlot, aes(x = variable, y = id_row, fill = value)) +
geom_tile() +
labs(x = "Attributes", y = "Rows") +
scale_fill_manual(
values = c("white", "black"),
labels = c("Real", "Missing")
) +
facet_wrap(.id ~ ., scales = "free_y") +
theme_jjo() +
theme(
legend.position = "top",
axis.text.x = element_text(angle = 45, hjust = 1),
legend.key = element_rect(colour = "black")
)
Figure 1.7: Check for missing value in 2016-individuals
Let’s have a look at what’s inside data_nes$data_2018:
# list structure
str(data_nes$year_2018, max.level = 1, give.attr = F, no.list = T)
## $ ind_2018070:Classes 'data.table' and 'data.frame': 22393 obs. of 46 variables:
## $ ind_2018072:Classes 'data.table' and 'data.frame': 29921 obs. of 46 variables:
## $ ind_2018074:Classes 'data.table' and 'data.frame': 38608 obs. of 46 variables:
## $ ind_2018080:Classes 'data.table' and 'data.frame': 19028 obs. of 46 variables:
A list of 4
data.frames, one for each seal
For convenience, we aggregate all 4 individuals into one dataset.
# combine all individuals
data_2018 <- rbindlist(data_nes$year_2018)
# display
DT::datatable(data_2018[sample.int(.N, 100), ], options = list(scrollX = T))
# raw_data
data_2018[, .(
nb_days_recorded = uniqueN(as.Date(date)),
nb_dives = .N,
maxdepth_mean = mean(maxdepth),
dduration_mean = mean(dduration),
botttime_mean = mean(botttime),
pdi_mean = mean(pdi, na.rm = T)
), by = .id] %>%
sable(
caption = "Summary diving information relative to each 2018 individual",
digits = 2
)
| .id | nb_days_recorded | nb_dives | maxdepth_mean | dduration_mean | botttime_mean | pdi_mean |
|---|---|---|---|---|---|---|
| ind_2018070 | 232 | 22393 | 305.52 | 783.27 | 243.22 | 109.55 |
| ind_2018072 | 341 | 29921 | 357.86 | 876.96 | 278.02 | 104.90 |
| ind_2018074 | 372 | 38608 | 250.67 | 686.25 | 291.89 | 302.77 |
| ind_2018080 | 215 | 19028 | 296.50 | 867.69 | 339.90 | 103.51 |
Very nice dataset :)
# build dataset to check for missing values
dataPlot <- melt(data_2018[, .(.id, is.na(.SD)), .SDcol = -c(
".id",
"divenumber",
"year",
"month",
"day",
"hour",
"min",
"sec",
"juldate",
"divetype",
"date",
"phase",
"lat",
"lon"
)])
# add the id of rows
dataPlot[, id_row := c(1:.N), by = c("variable", ".id")]
# plot
ggplot(dataPlot, aes(x = variable, y = id_row, fill = value)) +
geom_tile() +
labs(x = "Attributes", y = "Rows") +
scale_fill_manual(
values = c("white", "black"),
labels = c("Real", "Missing")
) +
facet_wrap(.id ~ ., scales = "free_y") +
theme_jjo() +
theme(
legend.position = "top",
axis.text.x = element_text(angle = 45, hjust = 1),
legend.key = element_rect(colour = "black")
)
Figure 2.1: Check for missing value in 2018-individuals
So far so good, only few variables seems to have missing values:
# table with percent
table_inter <- data_2018[, lapply(.SD, function(x) {
round(length(x[is.na(x)]) * 100 / length(x), 1)
}), .SDcol = -c(
".id",
"divenumber",
"year",
"month",
"day",
"hour",
"min",
"sec",
"juldate",
"divetype",
"date",
"phase",
"lat",
"lon"
)]
# find which are different from 0
cond_inter <- sapply(table_inter, function(x) {
x == 0
})
# display the percentages that are over 0
table_inter[, which(cond_inter) := NULL] %>%
sable(caption = "Percentage of missing values per columns having missing values!") %>%
scroll_box(width = "100%")
| lightatsurf | lattenuation | euphoticdepth | thermoclinedepth | driftrate | benthicdivevertrate | cornerindex | foragingindex | verticalspeed90perc | verticalspeed95perc |
|---|---|---|---|---|---|---|---|---|---|
| 26.3 | 89 | 62.6 | 1.3 | 0.5 | 22.7 | 75.8 | 0.5 | 0.1 | 0.1 |
Ok, let’s have a look at all the data. But first, we have to remove outliers. Some of them are quiet easy to spot looking at the distribution of dive duration:
ggplot(
data_2018[, .SD][, state := "Before"],
aes(x = dduration, fill = .id)
) +
geom_histogram(show.legend = FALSE) +
geom_vline(xintercept = 3000, linetype = "longdash") +
facet_grid(state ~ .id,
scales = "free"
) +
labs(y = "# of dives", x = "Dive duration (s)") +
theme_jjo()
Figure 2.2: Distribution of dduration for each seal. The dashed line highlight the “subjective” threshold used to remove outliers (3000 sec)
ggplot(
data_2018[dduration < 3000, ][][, state := "After"],
aes(x = dduration, fill = .id)
) +
geom_histogram(show.legend = FALSE) +
geom_vline(xintercept = 3000, linetype = "longdash") +
facet_grid(state ~ .id,
scales = "free"
) +
labs(x = "# of dives", y = "Dive duration (s)") +
theme_jjo()
Figure 2.3: Same distribution of dduration for each seal but after removing any dduration > 3000 sec. The dashed line highlight the “subjective” threshold used to remove outliers
It seems muche better, so let’s remove any rows with dduration > 3000 sec.
# filter data
data_2018_filter <- data_2018[dduration < 3000, ]
# nbrow removed
data_2018[dduration >= 3000, .(nb_row_removed = .N), by = .id] %>%
sable(caption = "# of rows removed by 2018-individuals")
| .id | nb_row_removed |
|---|---|
| ind_2018070 | 3 |
| ind_2018072 | 1 |
| ind_2018074 | 33 |
# let's first average `lightatsurf` by individuals, day since departure and hour
dataPlot <- data_2018[, .(lightatsurf = median(lightatsurf)),
by = .(.id, day_departure, date = as.Date(date), hour)
]
# display the result
ggplot(dataPlot, aes(x = day_departure, y = hour, fill = lightatsurf)) +
geom_tile() +
facet_grid(.id ~ .) +
theme_jjo() +
labs(x = "# of days since departure", y = "Hour", fill = "Light level at the surface") +
theme(legend.position = c("bottom"))
Figure 2.4: Visualization of light level at the surface along 2018-individuals’ trip
# let's first average `lightatsurf` by individuals, day since departure and hour
dataPlot <- data_2018[, .(lightatsurf = median(lightatsurf)),
by = .(.id, day_departure, date = as.Date(date), hour, phase)
]
# display the result
ggplot(dataPlot, aes(x = day_departure, y = hour, fill = phase)) +
geom_tile() +
facet_grid(.id ~ .) +
theme_jjo() +
labs(x = "# of days since departure", y = "Hour", fill = "Day time and night time as detected by the `cal_phase_day` function") +
theme(legend.position = c("bottom"))
Figure 2.5: Visualization of detected night time and day time along 2018-individuals’ trip
names_display <- names(data_2018_filter[, -c(
".id",
"date",
"divenumber",
"year",
"month",
"day",
"hour",
"min",
"sec",
"juldate",
"divetype",
"euphoticdepth",
"thermoclinedepth",
"day_departure",
"phase",
"lat",
"lon"
)])
for (i in names_display) {
cat("####", i, "{-} \n")
if (i == "maxdepth") {
print(
ggplot() +
geom_point(
data = data_2018_filter[, .(
.id,
date,
thermoclinedepth
)],
aes(
x = as.Date(date),
y = -thermoclinedepth,
colour = "Thermocline (m)"
),
alpha = .2,
size = .5
) +
geom_point(
data = data_2018_filter[, .(
.id,
date,
euphoticdepth
)],
aes(
x = as.Date(date),
y = -euphoticdepth,
colour = "Euphotic (m)"
),
alpha = .2,
size = .5
) +
scale_colour_manual(
values = c(
"Thermocline (m)" = "red",
"Euphotic (m)" = "black"
),
name = "Zone"
) +
new_scale_color() +
geom_point(
data = melt(data_2018_filter[, .(.id, date, get(i))], id.vars = c(".id", "date")),
aes(
x = as.Date(date),
y = -value,
col = .id
),
alpha = 1 / 10,
size = .5,
show.legend = FALSE
) +
facet_wrap(. ~ .id, scales = "free") +
scale_x_date(date_labels = "%m/%Y") +
labs(x = "Date", y = "Maximum Depth (m)") +
theme_jjo() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
)
)
cat("<blockquote> Considering `ind_2018074` has slightly different values than other individuals for the thermocline depth, it would be interesting to see where the animal went. </blockquote>")
} else if (i == "driftrate") {
print(
ggplot(
data = melt(data_2018_filter[, .(.id, date, get(i), divetype)], id.vars = c(".id", "date", "divetype")),
aes(
x = as.Date(date),
y = value,
col = divetype
)
) +
geom_point(
alpha = 1 / 10,
size = .5
) +
facet_wrap(. ~ .id, scales = "free") +
scale_x_date(date_labels = "%m/%Y") +
labs(x = "Date", y = "Drift Rate 'm/s", col = "Dive Type") +
theme_jjo() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
) +
guides(colour = guide_legend(override.aes = list(
size = 7,
alpha = 1
)))
)
} else {
print(
ggplot(
data = melt(data_2018_filter[, .(.id, date, get(i))], id.vars = c(".id", "date")),
aes(
x = as.Date(date),
y = value,
col = .id
)
) +
geom_point(
show.legend = FALSE,
alpha = 1 / 10,
size = .5
) +
facet_wrap(. ~ .id, scales = "free") +
scale_x_date(date_labels = "%m/%Y") +
labs(x = "Date", y = i) +
theme_jjo() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
)
}
cat(" \n \n")
}
Considering ind_2018074 has slightly different values than other individuals for the thermocline depth, it would be interesting to see where the animal went.
Few questions, that I should look into it:
- is the bimodal distribution of
dduration,desctimedue to nycthemeral migration?- is the bimodal distribution of
descrate(especially forind2018070andind_2018072) due to drift dive?- is
lightatbottcould be used to identify bioluminescence, cause it seems there is a lot going on at the bottom?- are the variations observed for
lightatsurfis due to moon cycle?- not sure why is there a bimodal distribution of
tempatbott!drifratethat one is awesome! Thanks todivetypewe can clearly see a pattern of how driftrate (and so buoyancy) change according time.- the bimodal distribution of
verticalspeed90andverticalspeed95should be due to drift dive.
# same plot with a colored for the phase of the day
for (i in names_display) {
cat("####", i, "{-} \n")
print(
ggplot(
data = melt(data_2018_filter[, .(.id, date, get(i), phase)],
id.vars = c(
".id",
"date",
"phase"
)
),
aes(
x = as.Date(date),
y = value,
col = phase
)
) +
geom_point(
alpha = 1 / 10,
size = .5
) +
facet_wrap(. ~ .id, scales = "free") +
scale_x_date(date_labels = "%m/%Y") +
labs(x = "Date", y = i) +
theme_jjo() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
) +
guides(colour = guide_legend(override.aes = list(
size = 7,
alpha = 1
)))
)
cat(" \n \n")
}
for (i in names_display) {
cat("####", i, "{-} \n")
if (i == "maxdepth") {
print(
ggplot() +
geom_point(
data = data_2018_filter[day_departure < 32, .(
.id,
day_departure,
thermoclinedepth
)],
aes(
x = day_departure,
y = -thermoclinedepth,
colour = "Thermocline (m)",
group = day_departure
),
alpha = .2,
size = .5
) +
geom_point(
data = data_2018_filter[day_departure < 32, .(
.id,
day_departure,
euphoticdepth
)],
aes(
x = day_departure,
y = -euphoticdepth,
colour = "Euphotic (m)",
group = day_departure
),
alpha = .2,
size = .5
) +
scale_colour_manual(
values = c(
"Thermocline (m)" = "red",
"Euphotic (m)" = "black"
),
name = "Zone"
) +
new_scale_color() +
geom_boxplot(
data = melt(data_2018_filter[day_departure < 32, .(.id, day_departure, get(i))], id.vars = c(".id", "day_departure")),
aes(
x = day_departure,
y = -value,
col = .id,
group = day_departure
),
alpha = 1 / 10,
size = .5,
show.legend = FALSE
) +
facet_wrap(. ~ .id, scales = "free") +
labs(x = "# days since departure", y = "Maximum Depth (m)") +
theme_jjo() +
theme(legend.position = "bottom")
)
} else {
print(
ggplot(
data = melt(data_2018_filter[day_departure < 32, .(.id, day_departure, get(i))], id.vars = c(".id", "day_departure")),
aes(
x = day_departure,
y = value,
color = .id,
group = day_departure
)
) +
geom_boxplot(
show.legend = FALSE,
alpha = 1 / 10,
size = .5
) +
facet_wrap(. ~ .id, scales = "free") +
labs(x = "# days since departure", y = i) +
theme_jjo()
)
}
cat(" \n \n")
}
print(
ggplot(
data = melt(
data_2018_filter[day_departure < 32 &
divetype == "2: drift", .(.id, day_departure, get(i), divetype)],
id.vars = c(".id", "day_departure", "divetype")
),
aes(
x = day_departure,
y = value,
color = divetype,
group = day_departure
)
) +
geom_boxplot(
show.legend = FALSE,
alpha = 1 / 10,
size = .5
) +
facet_wrap(. ~ .id, scales = "free") +
labs(x = "# days since departure", y = i) +
theme_jjo()
)
for (i in names_display) {
cat("####", i, "{-} \n")
print(
ggplot(
data = melt(data_2018_filter[
day_departure < 32,
.(.id, day_departure, get(i), phase)
],
id.vars = c(".id", "day_departure", "phase")
),
aes(
x = day_departure,
y = value,
color = phase,
group = interaction(day_departure, phase),
)
) +
geom_boxplot(
alpha = 1 / 10,
size = .5
) +
facet_wrap(. ~ .id, scales = "free") +
labs(x = "# days since departure", y = i) +
theme_jjo() +
theme(legend.position = "bottom")
)
cat(" \n \n")
}
Can we find nice correlation?
# compute correlation
corr_2018 <- round(cor(data_2018_filter[, names_display, with = F],
use = "pairwise.complete.obs"
), 1)
# replace NA value by 0
corr_2018[is.na(corr_2018)] <- 0
# compute p_values
corr_p_2018 <- cor_pmat(data_2018_filter[, names_display, with = F])
# replace NA value by 0
corr_p_2018[is.na(corr_p_2018)] <- 1
# display
ggcorrplot(
corr_2018,
p.mat = corr_p_2018,
hc.order = TRUE,
method = "circle",
type = "lower",
ggtheme = theme_jjo(),
sig.level = 0.05,
colors = c("#00AFBB", "#E7B800", "#FC4E07")
)
Figure 2.6: Correlation matrix (crosses indicate non significant correlation)
Another way to see it:
# flatten correlation matrix
cor_result_2018 <- flat_cor_mat(corr_2018, corr_p_2018)
# keep only the one above .7
cor_result_2018[cor >= .7, ][order(-abs(cor))] %>%
sable(caption = "Pairwise correlation above 0.75 and associated p-values")
| row | column | cor | p |
|---|---|---|---|
| verticalspeed90perc | verticalspeed95perc | 1.0 | 0 |
| maxdepth | asctime | 0.8 | 0 |
| botttime | efficiency | 0.8 | 0 |
| dwigglesbott | foragingindex | 0.8 | 0 |
| maxdepth | dduration | 0.7 | 0 |
| maxdepth | desctime | 0.7 | 0 |
| dduration | desctime | 0.7 | 0 |
| dduration | asctime | 0.7 | 0 |
| totvertdistbot | bottrange | 0.7 | 0 |
| totvertdistbot | verticalspeed90perc | 0.7 | 0 |
| totvertdistbot | verticalspeed95perc | 0.7 | 0 |
I guess nothing unexpected here, I’ll have to check with Patrick about the efficiency ;)
# dataset to plot proportional area plot
data_2018_filter[, sum_id := .N, by = .(.id, day_departure)][, sum_id_days := .N, by = .(.id, day_departure, divetype)][, prop := sum_id_days /
sum_id]
dataPlot <- unique(data_2018_filter[, .(prop, .id, divetype, day_departure)])
# area plot
ggplot(dataPlot, aes(
x = as.numeric(day_departure),
y = prop,
fill = as.character(divetype)
)) +
geom_area(alpha = 0.6, size = 1) +
facet_wrap(.id ~ ., scales = "free") +
theme_jjo() +
theme(legend.position = "bottom") +
labs(x = "# of days since departure", y = "Proportion of dives", fill = "Dive types")
Figure 2.7: Proportion dive types
# plot
ggplot(data = data_2018_filter, aes(y = dduration, x = maxdepth, col = .id)) +
geom_point(size = .5, alpha = .1, show.legend = FALSE) +
facet_wrap(.id ~ .) +
labs(x = "Maximum depth (m)", y = "Dive duration (s)") +
theme_jjo()
Figure 2.8: Dive duration vs. Maximum Depth colored 2018-individuals
# plot
ggplot(data = data_2018_filter, aes(y = dduration, x = maxdepth, col = divetype)) +
geom_point(size = .5, alpha = .1) +
facet_wrap(.id ~ .) +
guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))) +
labs(x = "Maximum depth (m)", y = "Dive duration (s)") +
theme_jjo() +
theme(legend.position = "bottom")
Figure 2.9: Dive duration vs. Maximum Depth colored by Dive Type
# plot
ggplot(data = data_2018_filter[, prop_track := (day_departure * 100) / max(day_departure), by = .id], aes(y = dduration, x = maxdepth, col = prop_track)) +
geom_point(size = .5, alpha = .1) +
facet_wrap(.id ~ .) +
labs(x = "Maximum depth (m)", y = "Dive duration (s)", col = "Proportion of completed track (%)") +
scale_color_continuous(type = "viridis") +
theme_jjo() +
theme(legend.position = "bottom")
Figure 2.10: Dive duration vs. Maximum Depth colored by # days since departure
# plot
ggplot(data = data_2018_filter, aes(y = dduration, x = maxdepth, col = phase)) +
geom_point(size = .5, alpha = .1) +
facet_wrap(.id ~ .) +
guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))) +
labs(x = "Maximum depth (m)", y = "Dive duration (s)", col = "Phases of the day") +
theme_jjo() +
theme(legend.position = "bottom")
Figure 2.11: Dive duration vs. Maximum Depth colored by phases of the day
There seems to be a patch for high depths (especially visible for
ind2018070), but I don’t know what it could be linked to…
In the following graphs:
driftrateis calculated using onlydivetype == "2: drift"- whereas all the others variables are calculated all dives considered
# build dataset
dataPlot <- data_2018_filter[divetype == "2: drift",
# median drift rate for drift dive
.(driftrate = median(driftrate, na.rm = T)),
by = .(.id, day_departure)
][data_2018_filter[,
.(
# median dive duration all dives considered
dduration = median(dduration, na.rm = T),
# median max depth all dives considered
maxdepth = median(maxdepth, na.rm = T),
# median bottom dives all dives considered
botttime = median(botttime, na.rm = T)
),
by = .(.id, day_departure)
],
on = c(".id", "day_departure")
]
# plot
ggplot(dataPlot, aes(x = botttime, y = driftrate, col = .id)) +
geom_point(size = .5, alpha = .5) +
geom_smooth(method = "lm") +
guides(color = FALSE) +
facet_wrap(.id ~ .) +
scale_x_continuous(limits = c(0, 700)) +
labs(x = "Daily median Bottom time (s)", y = "Daily median drift rate (m.s-1)") +
theme_jjo()
Figure 2.12: Drift rate vs. Bottom time
# plot
ggplot(dataPlot, aes(x = maxdepth, y = driftrate, col = .id)) +
geom_point(size = .5, alpha = .5) +
geom_smooth(method = "lm") +
guides(color = FALSE) +
facet_wrap(.id ~ .) +
labs(x = "Daily median Maximum depth (m)", y = "Daily median drift rate (m.s-1)") +
theme_jjo()
Figure 2.13: Drift rate vs. Maximum depth
# plot
ggplot(dataPlot, aes(x = dduration, y = driftrate, col = .id)) +
geom_point(size = .5, alpha = .5) +
geom_smooth(method = "lm") +
guides(color = FALSE) +
facet_wrap(.id ~ .) +
labs(x = "Daily median Dive duration (s)", y = "Daily median drift rate (m.s-1)") +
theme_jjo()
Figure 2.14: Drift rate vs. Dive duration
# This piece of code is only there to show how to draw a polylines with a gradient color using leaflet.
# We're not using it due to the size of the created map, and will continue using circle marker
# datasets used to display map
df_driftrate = unique(data_2018_filter[.id == "ind_2018070" &
divetype == "2: drift",
.(.id, lat, lon, dduration)])
# color palette
pal <- colorNumeric(
palette = "YlGnBu",
domain = df_driftrate$dduration
)
# add
df_driftrate[, `:=`(nextLat = shift(lat),
nextLon = shift(lon),
color = pal(df_driftrate$dduration))]
# interactive map
gradient_map <- leaflet() %>%
setView(lng = -122, lat = 38, zoom = 2) %>%
addTiles()
# add lines
for (i in 1:nrow(df_driftrate)) {
gradient_map <- addPolylines(
map = gradient_map,
data = df_driftrate,
lat = as.numeric(df_driftrate[i, c('lat', 'nextLat')]),
lng = as.numeric(df_driftrate[i, c('lon', 'nextLon')]),
color = df_driftrate[i, color],
weight = 3,
group = "individual_1"
)
}
# add layer control
gradient_map <- addLayersControl(
map = gradient_map,
overlayGroups = c("individual_1"),
options = layersControlOptions(collapsed = FALSE)
)
# format(object.size(gradient_map), units = "Mb")
# interactive map
gradient_map <- leaflet() %>%
setView(lng = -132, lat = 48, zoom = 4) %>%
addTiles()
# loop by individuals and variable
grps = NULL
for (i in seq(data_2018_filter[!is.na(lat),unique(.id)])){
for (k in c("dduration","maxdepth","efficiency", "driftrate")){
if (k == "driftrate"){
# set dataset used to plot
dataPlot = unique(data_2018_filter %>%
.[order(date),] %>%
.[.id==data_2018_filter[!is.na(lat), unique(.id)][i] &
divetype == "2: drift" &
!is.na(get(k)),
c("lat", "lon", k), with=FALSE])
# color palette creation
colPal <- colorNumeric(
palette = "BrBG",
domain = seq(-1,1,0.1)
)
} else {
# set dataset used to plot
dataPlot = unique(data_2018_filter %>%
.[order(date),] %>%
.[.id==data_2018_filter[!is.na(lat), unique(.id)][i] &
divetype != "2: drift" &
!is.na(get(k)),
c("lat", "lon", k), with=FALSE])
# color palette creation
colPal <- colorNumeric(
palette = "YlGnBu",
domain = dataPlot[,get(k)]
)
}
# add color to dataset
dataPlot[, color := colPal(dataPlot[,get(k)])]
# add size column
dataPlot[, radius := 3]
# mark the beginning of the trip
dataPlot[1, `:=` (color = "green",
radius = 4)]
# mark the end of the trip
dataPlot[.N, `:=` (color = "red",
radius = 4)]
# add markers to map
gradient_map <- addCircleMarkers(
map = gradient_map,
data = dataPlot,
lat = ~lat,
lng = ~lon,
radius = ~radius,
stroke = FALSE,
color = ~color,
fillOpacity = 1,
group = paste(data_2018_filter[,unique(.id)][i], "-", k)
) %>%
addLegend("bottomleft",
data = dataPlot,
group = paste(data_2018_filter[,unique(.id)][i], "-", k),
pal = colPal,
values = ~get(k),
title = k,
opacity = 1
)
# retrieve groups
grps = c(grps, paste(data_2018_filter[,unique(.id)][i], "-", k))
}
}
# add layer control
gradient_map <- addLayersControl(
map = gradient_map,
overlayGroups = grps,
options = layersControlOptions(collapsed = TRUE)
) %>% hideGroup(grps)
# display
gradient_map
Figure 2.15: Tracking data 2018 individuals